#!/usr/bin/env Rscript

# Load necessary libraries
library(dplyr)
library(tidyr)
library(purrr)

# Load the CSV data from two files
data1 <- read.csv("external.csv", stringsAsFactors = FALSE)
data2 <- read.csv("internal.csv", stringsAsFactors = FALSE)

# Combine the data from the two CSV files
combined_data <- bind_rows(data1, data2)

# Convert relevant columns to factors
combined_data$question <- as.factor(combined_data$question)
combined_data$participant <- as.factor(combined_data$participant)
combined_data$tool <- as.factor(combined_data$tool)

# Perform Kruskal-Wallis test by tool
kruskal_results <- combined_data %>%
  group_by(tool) %>%
  summarize(
    kruskal_p_value = if (length(unique(question)) > 1) {
      kruskal.test(response ~ question)$p.value
    } else {
      NA # Return NA if only one group
    }
  )

# Print Kruskal-Wallis test results
print("Kruskal-Wallis Test Results:")
print(kruskal_results)

# Perform pairwise Wilcoxon tests if the Kruskal-Wallis test is significant
pairwise_wilcox_results <- combined_data %>%
  group_by(tool) %>%
  do({
    tool_data <- .
    if (length(unique(tool_data$question)) > 1) {
      wilcox_results <- tryCatch(
        pairwise.wilcox.test(tool_data$response, tool_data$question, p.adjust.method = "bonferroni", exact = FALSE),
        warning = function(w) {
          message("Warning: ", conditionMessage(w))
          NULL
        }
      )
      if (!is.null(wilcox_results)) {
        data.frame(tool = unique(tool_data$tool), wilcox_p_value = list(wilcox_results$p.value))
      } else {
        data.frame(tool = unique(tool_data$tool), wilcox_p_value = list(NA))
      }
    } else {
      data.frame(tool = unique(tool_data$tool), wilcox_p_value = list(NA))
    }
  }) %>%
  ungroup()

# Print pairwise Wilcoxon test results
print("Pairwise Wilcoxon Test Results:")
print(pairwise_wilcox_results)
